Introduction

library(lmerTest)
library(lmtest)
library(ggfortify)
library(glmmTMB)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(gridExtra)
library(grid)
library(qqman)
library(pheatmap)

There are a total of 80 samples for the AD model J20 with half samples from each tissue, Cortex and Hippocampus. The table below summaries the equal distribution of samples across genotype as well. Combining the two brain regions may increase power to detect small differences had we analysed within tissues seperetely.

There are 41 mouse - 2 which have one brain region and 39 which have both brain regions. 95% of samples have matched brains.

d <- table(pheno$Tissue, pheno$Genotype)
grid.table(d)

Mixed Effects Models

\[{Methylation } = {Tissue + Tissue*Genotype + Genotype + Age + Pathology + Tissue*Pathology + (1|Mouse)}\]

This model will run through each probe producing stats for each site across the mouse genome. Age and tissue are added as covariates.

  • Genotype - evaluates consistent genotype effects across tissues
  • Tissue*Genotype - picks up genotype effects that are different between tissues
  • Pathology - - consistent pathological effects across tissues
  • Tissue*Pathology - different pathological effects between tissues
  • (1|Mouse) - this term controls for repeated measures in model

From the analysis of 23,633 probes, 1 failed to fit the model and are removed from further analysis. This issue is most likely caused by the Tiisue*Pathology term as this had not occured without this term included …

J20 <- read.csv("/gpfs/ts0/projects/Research_Project-191406/Aisha/data/Array/MixedModelResultswPathology_J20.csv",header=T, stringsAsFactors = F)
colnames(J20)[colnames(J20) == 'X'] <- 'cpg'
mm10_Manifest <- read.csv("/gpfs/ts0/projects/Research_Project-191406/Aisha/data/Array/mm10_Manifest.csv", stringsAsFactors = F, header = T, row.names = 1)

# Add annotation files - chr and bp
probe_mapping_file <- read_csv("/gpfs/ts0/projects/Research_Project-191406/Aisha/data/Array/HorvathMammalChip_SpeciesGenomeCoordsUnique_v1.0.csv", col_types = cols(.default = "c"))
species_mappability_name = "MusMusculus"
species_probes <- probe_mapping_file[, c('probeID','MusMusculus')]
species_probes <- as.data.frame(species_probes[!is.na(species_probes$MusMusculus),]) #23633
species_probes$Chr <- gsub(":.*","",species_probes$MusMusculus)
species_probes$Bp <- gsub(".*:","",species_probes$MusMusculus)

J20$Chr <- species_probes$Chr[match(species_probes$probeID, J20[,"cpg"])]
J20$Bp <- species_probes$Bp[match(species_probes$probeID, J20[,"cpg"])]
# Convert x and y to numbers and remove non autosomal
J20<- J20[-which(J20$Chr %in% c("CHR_MG51_PATCH", "CHR_MG4200_PATCH","CHR_MG3699_PATCH")),]
J20$Chr <- gsub("X", 20, J20$Chr)
J20$Chr <- gsub("Y", 21, J20$Chr)
J20$Chr <- as.numeric(J20$Chr)
J20$Bp <- as.numeric(J20$Bp)
J20$Gene_Symbol <- mm10_Manifest$Gene_Symbol[match(J20$cpg, mm10_Manifest$cpg)]
rownames(J20) <- J20$cpg
J20$SNP <- J20$Gene_Symbol

##J20 - 26,629 rows
threshold <- 0.05
color_J20_TG <-"#FF5A62"
# filters for 1 probe that did not converge with the model
na_df <- as.data.frame(J20[rowSums(is.na(J20)) > 0,]) 
na_df <- na_df[,-c(29:34)]
na_df <- as.data.frame(na_df[rowSums(is.na(na_df)) > 0,]) 

# remove the 1 probe
J20 <- J20[setdiff(rownames(J20),rownames(na_df)),]

#J20 - 23,625
J20$FDR_adj_GenotypeTG <- p.adjust(J20[,"PrZ.GenotypeTG"], method = "fdr")
J20$FDR_adj_TissueGenotype <- p.adjust(J20[,"PrZ.TissueHippocampus.GenotypeTG"], method = "fdr")
J20$FDR_adj_Pathology <- p.adjust(J20[,"PrZ.Pathology"], method = "fdr")
J20$FDR_adj_TissuePathology <- p.adjust(J20[,"PrZ.TissueHippocampus.Pathology"], method = "fdr")

significant_tbl <- c()

significant_tbl[1] <- nrow(J20[which(J20$FDR_adj_GenotypeTG < 0.05),])     #53
significant_tbl[2] <- nrow(J20[which(J20$FDR_adj_TissueGenotype < 0.05),]) #1702
significant_tbl[3] <- nrow(J20[which(J20$FDR_adj_Pathology < 0.05),])  #162
significant_tbl[4] <- nrow(J20[which(J20$FDR_adj_TissuePathology < 0.05),])  #38

significant_tbl <- as.data.frame(t(significant_tbl))
rownames(significant_tbl) <- c("Significant Sites")
colnames(significant_tbl) <- c("Genotype", "Tissue*Genotype", "Pathology", "Tissue*Pathology")
grid.table(significant_tbl)

Genotype

#### Raw p values
lamda <- qchisq(1-median(J20$PrZ.GenotypeTG),1)/qchisq(0.5,1)
qq(J20$PrZ.GenotypeTG, main = "Genotype raw pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

#### FDR corrected qqplots
lamda <- qchisq(1-median(J20$FDR_adj_GenotypeTG),1)/qchisq(0.5,1)
qq(J20$FDR_adj_GenotypeTG, main = "Genotype FDR corrected pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

manhattan(J20, p = "FDR_adj_GenotypeTG", bp = "Bp", chr = "Chr", 
          genomewide = -log10(threshold), suggestiveline = FALSE, 
          logp=T, col = c("black", color_J20_TG), main = "Genotype",
          chrlabs = c(1:19, "X", "Y"),las = 2, annotatePval = threshold,
          cex.axis = 1.5, cex.lab = 1.2)

#Zbtb20 is distorting the manhattan plot so we can remove tht for now
J20 <- J20[order(J20$PrZ.GenotypeTG),]
J20_2 <- J20[-1,]
manhattan(J20_2, p = "FDR_adj_GenotypeTG", bp = "Bp", chr = "Chr", 
          genomewide = -log10(threshold), suggestiveline = FALSE, 
          logp=T, col = c("black", color_J20_TG), main = "Genotype (Zbtb20 removed)",
          chrlabs = c(1:19, "X", "Y"),las = 2, annotatePval = threshold,
          cex.axis = 1.5, cex.lab = 1.2)

J20 <- J20[order(J20$FDR_adj_GenotypeTG),]
par(mfrow = c(2, 3))
for(i in 1:6){
  plotTissueGenotypeDMP(cpg = rownames(J20[i,]), betaResults = J20, betas = betas, pheno = pheno, 
                        colour = color_J20_TG, column= "FDR_adj_GenotypeTG")
}

J20_sig <- J20[1:length(J20[which(J20$FDR_adj_GenotypeTG <= 0.05), "cpg"]),]
betas_sig <- betas[rownames(J20_sig),]
pheno_plot <- pheno[,c("Tissue","Genotype", "Age_months", "Pathology")]

pheatmap(betas_sig,  
         annotation_col = pheno_plot,
         cluster_rows = T,
         show_rownames = F,
         show_colnames = F)

Tissue*Genotype

#### Raw p values
lamda <- qchisq(1-median(J20$PrZ.TissueHippocampus.GenotypeTG),1)/qchisq(0.5,1)
qq(J20$PrZ.TissueHippocampus.GenotypeTG, main = "Tissue*Genotype raw pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

#### FDR corrected qqplots
lamda <- qchisq(1-median(J20$FDR_adj_TissueGenotype),1)/qchisq(0.5,1)
qq(J20$FDR_adj_TissueGenotype, main = "Tissue*Genotype FDR corrected pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

manhattan(J20, p = "FDR_adj_TissueGenotype", bp = "Bp", chr = "Chr", 
          genomewide = -log10(threshold), suggestiveline = FALSE, 
          logp=T, col = c("black", color_J20_TG), main = "Tissue*Genotype",
          chrlabs = c(1:19, "X", "Y"),las = 2, annotatePval = threshold,
          cex.axis = 1.5, cex.lab = 1.2)

J20 <- J20[order(J20$FDR_adj_TissueGenotype),]
par(mfrow = c(2, 3))
for(i in 1:6){
  plotTissueGenotypeDMP(cpg = rownames(J20[i,]), betaResults = J20, betas = betas, pheno = pheno, 
                        colour = color_J20_TG, column= "FDR_adj_TissueGenotype")
}

J20_sig <- J20[1:length(J20[which(J20$FDR_adj_TissueGenotype <= 0.05), "cpg"]),]
betas_sig <- betas[rownames(J20_sig),]
pheno_plot <- pheno[,c("Tissue","Genotype", "Age_months", "Pathology")]

pheatmap(betas_sig,  
         annotation_col = pheno_plot,
         cluster_rows = T,
         show_rownames = F,
         show_colnames = F)

Pathology

#### Raw p values
lamda <- qchisq(1-median(J20$PrZ.Pathology),1)/qchisq(0.5,1)
qq(J20$PrZ.Pathology, main = "Pathology raw pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

#### FDR corrected qqplots
lamda <- qchisq(1-median(J20$FDR_adj_Pathology),1)/qchisq(0.5,1)
qq(J20$FDR_adj_Pathology, main = "Pathology FDR corrected pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

manhattan(J20, p = "FDR_adj_Pathology", bp = "Bp", chr = "Chr", 
          genomewide = -log10(threshold), suggestiveline = FALSE, 
          logp=T, col = c("black", color_J20_TG), main = "Pathology",
          chrlabs = c(1:19, "X", "Y"),las = 2, annotatePval = threshold,
          cex.axis = 1.5, cex.lab = 1.2)

J20 <- J20[order(J20$FDR_adj_Pathology),]
par(mfrow = c(2, 3))
for(i in 1:6){
  plotPathologyDMP(cpg = rownames(J20[i,]), betaResults = J20, betas = betas, pheno = pheno, 
                   colour = color_J20_TG, column= "FDR_adj_Pathology")
}

J20_sig <- J20[1:length(J20[which(J20$FDR_adj_Pathology <= 0.05), "cpg"]),]
betas_sig <- betas[rownames(J20_sig),]
pheno_plot <- pheno[,c("Tissue","Genotype", "Age_months", "Pathology")]

pheatmap(betas_sig,  
         annotation_col = pheno_plot,
         cluster_rows = T,
         show_rownames = F,
         show_colnames = F)

Tissue*Pathology

#### Raw p values
lamda <- qchisq(1-median(J20$PrZ.TissueHippocampus.Pathology),1)/qchisq(0.5,1)
qq(J20$PrZ.TissueHippocampus.Pathology, main = "Tissue*Pathology raw pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

#### FDR corrected qqplots
lamda <- qchisq(1-median(J20$FDR_adj_TissuePathology),1)/qchisq(0.5,1)
qq(J20$FDR_adj_TissuePathology, main = "Tissue*Pathology FDR corrected pvalues")
mtext(paste("Lamda = ", signif(lamda,3)), side = 3, adj = 1)

manhattan(J20, p = "FDR_adj_TissuePathology", bp = "Bp", chr = "Chr", 
          genomewide = -log10(threshold), suggestiveline = FALSE, 
          logp=T, col = c("black", color_J20_TG), main = "Tissue*Pathology",
          chrlabs = c(1:19, "X", "Y"),las = 2, annotatePval = threshold,
          cex.axis = 1.5, cex.lab = 1.2)

J20 <- J20[order(J20$FDR_adj_TissuePathology),]
par(mfrow = c(2, 3))
for(i in 1:6){
  plotPathologyDMP(cpg = rownames(J20[i,]), betaResults = J20, betas = betas, pheno = pheno, 
                   colour = color_J20_TG, column= "FDR_adj_TissuePathology" )
}

J20_sig <- J20[1:length(J20[which(J20$FDR_adj_TissuePathology <= 0.05), "cpg"]),]
betas_sig <- betas[rownames(J20_sig),]
pheno_plot <- pheno[,c("Tissue","Genotype", "Age_months", "Pathology")]

pheatmap(betas_sig,  
         annotation_col = pheno_plot,
         cluster_rows = T,
         show_rownames = F,
         show_colnames = F)